use "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta", clear

gen NONEMP = 1 if ABS==1
	recode NONEMP (.=0) if ABS==0
	

gen treated = 1 if HS==1 & (NONEMP==1)
	recode treated (.=0)
	
keep if d>=3 & d<=6
keep if NONEMP !=.

keep if treated==1

gen vallazon1=vallazon1_l0
replace vallazon1=vallazon1_l1 if vallazon1==.

keep anon t d vallazon1

tempfile treated
save `treated'
*HS months of treated people (HS=1 & ABS=1) with firm id they work at and length of absence

use `treated', clear
drop anon
duplicates drop
sort vallazon1 t d
egen c=seq(), by(t vallazon1)
reshape wide d, i(vallazon1 t) j(c)
tempfile treated_firms
save `treated_firms'
*firm-months in which an employee of the firm gets a HS with subsequent absence

use "$out/base_shock_data_w_postemp_info_bnoSext_2022Sept.dta", clear
keep anon
duplicates drop
tempfile treated_people
save `treated_people'
*list of people who ever get a HS (both w/wo absence)

use "$out/HS0temp1_bnoSext_2022Sept.dta", clear

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 ev w1 wh1 kor ferfi fogvisz1)

* keep only those who were attached to a firm in a month in which there is a treated in the firm in the ACC month
merge m:1 vallazon1 t using `treated_firms'
egen maxmerge=max(_merge), by(anon)
keep if maxmerge==3
drop maxmerge
gen byte event_time=_merge==3
drop _merge

* drop all those who were ever treated
merge m:1 anon using `treated_people'
keep if _merge==1
drop _merge

* define employment 
merge 1:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
* at the time of the shock
gen EMP = 1 if (vallazon1!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)
* in the potential d-s
foreach X of numlist 3/6{
	gen EMPin_d`X'=f`X'.EMP==1
}
foreach Z of numlist 1/3{
	gen dEMP`Z'=.
	foreach Y of numlist `Z'/3{
		foreach X of numlist 3/6{
			if "`Z'"=="1"{
				replace dEMP`Z'=`X' if dEMP`Z'==. & d`Y'==`X' & EMPin_d`X'==1
			}
			else{
				local ZZ=`Z'-1
				replace dEMP`Z'=`X' if dEMP`Z'==. & d`Y'==`X' & EMPin_d`X'==1 & dEMP`ZZ'<`X'
			}
		}
	}
}


* keep all potential event months with potential d-s for all the within-firm matching control people
preserve
keep if event_time==1 & EMP==1 & dEMP1!=.
keep anon t vallazon1 dEMP? 
rename dEMP? d?
egen countevent=count(t), by(anon)
sum

save "$out/potential_controls_for_within_firm_match_broad_list_2023Dec.dta", replace


codebook anon
restore

* we keep information in month t
keep if event_time==1 & EMP==1 & dEMP1!=.
drop EMP d? EMPin*
rename dEMP? d?
egen countevent=count(t), by(anon)
keep anon t vallazon1 d? countevent ferfi kor 
label var countevent "N. of times the same anon can be a potential within-firm matched control"

* add controls and outcome variables
* outcomes: lnw1_restr FEj - in relative_time 0,1,2,3
* controls: kor occupation teaor1_h1 ferfi mean_lag_w log_health_12mo A_12mo-V_12mo FEi_imp flag_missingFEi size_pre_imp FO_pre_imp logmean_w_pre_imp flag_missingsize_pre flag_missingFO_pre flag_missinglogmean_w_pre	flag_missingFEj_l4 - FEj_l12_imp 	
* one row is an anon-t-d triplet

merge m:1 anon using "$out_AKM/FEi_full_Jan2023.dta"
tab _m
drop if _m==2
drop _m
rename anon_fix FEi

* step 1: add the t-related

* assign occupation in t
merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(feor1_h2 teaor1_h1)
rename feor1_h2 feor1_h2_t
gen occupation=.
	replace occupation=0 if missing(feor1_h2_t)
	replace occupation=3 if feor1_h2_t==11
	replace occupation=4 if feor1_h2_t==12
	replace occupation=7 if feor1_h2_t==13
	replace occupation=1 if feor1_h2_t==14
	replace occupation=1 if feor1_h2_t==15
	replace occupation=3 if feor1_h2_t>=17 & feor1_h2_t<=23
	replace occupation=4 if feor1_h2_t>=24 & feor1_h2_t<=33
	replace occupation=5 if feor1_h2_t>=34 & feor1_h2_t<=43
	replace occupation=6 if feor1_h2_t>=44 & feor1_h2_t<=47
	replace occupation=7 if feor1_h2_t==48	 
drop feor1_h2_t
rename vallazon1 vallazon
gen ev=2003+int((t-1)/12)
merge m:1 vallazon ev using "$out/firmquality.dta", nogen keep(master match) keepusing(letszam FO mean_firmw)
foreach var of varlist mean_firmw FO letszam {
	rename `var' `var'_t
}
rename *mean_firmw* *mean_w*
rename *letszam* *size*
foreach x in size FO mean_w {
	rename 	`x'_t `x'_pre
}
gen logmean_w_pre=log(mean_w_pre)
drop mean_w_pre

* add l1-l3 values of restricted wage and firm FE

gen n=_n
expand 4
rename t t_merge
egen t=seq(), by(n)
replace t=t-4
replace t=t +t_merge

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny fogvisz1)
rename vallazon vallazon_t
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
replace ev=2003+int((t-1)/12)

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* deflate the wage variable	
gen w1_defl=w1/defl
gen w1_wins_defl=w1_wins/defl

* deflated wage and hours worked changed to missing if EMP!=1 that period

replace w1_defl=. if EMP!=1
replace w1_wins_defl=. if EMP!=1
replace wh1=. if EMP!=1 

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
	
gen lnw1_restr=ln(w1_defl/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
gen lnw1_wins_restr=ln(w1_wins_defl/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203

foreach X in lnw1_restr lnw1_wins_restr FEj{
	egen `X'_t_=mean(`X') if t<t_merge, by(n)
	egen `X'_t=min(`X'_t_), by(n)
	drop `X'_t_
}	
keep if t==t_merge
drop t vallazon w1 w1_wins wh1 transfer_ny transfer_mtp FEj SLdays lnw1_restr lnw1_wins_restr w1_defl w1_wins_defl fogvisz1 EMP

* add l4-l12 values of lnw1_restr and FEj 

expand 10
egen t=seq(), by(n)
replace t=t-14 if t!=1
replace t=0 if t==1
replace t=t +t_merge

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny fogvisz1)
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
replace ev=2003+int((t-1)/12)

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)	

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

replace defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
	
gen lnw1_restr=log((w1/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_restr=. if EMP!=1
gen lnw1_wins_restr=log((w1_wins/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_wins_restr=. if EMP!=1
drop EMP

egen mean_lag_w_=mean(lnw1_restr) if t<t_merge, by(n)
egen mean_lag_w=min(mean_lag_w_), by(n)
drop mean_lag_w_
egen mean_lag_w_wins_=mean(lnw1_wins_restr) if t<t_merge, by(n)
egen mean_lag_w_wins=min(mean_lag_w_wins_), by(n)
drop mean_lag_w_wins_
drop w1 w1_wins wh1 transfer_ny transfer_mtp lnw1_restr lnw1_wins_restr vallazon fogvisz1 SLdays
foreach var of varlist FEj {
	gen flag_missing`var' = 1 if `var' == .
		recode flag_missing`var' (.=0)
	gen `var'_imp = `var'
	sum `var'
	replace `var'_imp = `r(mean)' if flag_missing`var' == 1
	drop `var'
}
gen time=t_merge-t
rename flag_missingFEj flag_missingFEj_l
rename FEj_imp FEj_imp_l
drop ev defl t 
reshape wide flag_missingFEj_l FEj_imp_l, i(n) j(time)
rename FEj_imp_l* FEj_l*_imp    

foreach var of varlist size_pre FO_pre logmean_w_pre FEi {
	gen flag_missing`var' = 1 if `var' == .
		recode flag_missing`var' (.=0)
	gen `var'_imp = `var'
	sum `var'
	replace `var'_imp = `r(mean)' if flag_missing`var' == 1
	drop `var'
}

* add l3-l12 values of health-related variables
	
expand 11
egen t=seq(), by(n)
replace t=t-14 if t!=1
replace t=0 if t==1
replace t=t +t_merge

merge m:1 anon t using "$in/admin3_eu_fekvo.dta", nogen keepusing(fekvo_ft) keep(master match)
merge m:1 anon t using "$in/admin3_eu_jaro.dta", nogen keepusing(jaro_ft_ossz) keep(master match)
merge m:1 anon t using "$in/admin3_eu_veny.dta", nogen keepusing(tbtam_ossz betegft_ossz) keep(master match)
merge m:1 anon t using "$in_med/admin3_eu_veny_H2_v2.dta", nogen keepusing(A_ft B_ft C_ft D_ft G_ft H_ft J_ft L_ft M_ft N_ft P_ft R_ft S_ft V_ft) keep(master match)

foreach var of varlist fekvo_ft jaro_ft_ossz tbtam_ossz betegft_ossz {
	replace `var'=0 if `var'==.
}
gen health = fekvo_ft + jaro_ft_ossz + tbtam_ossz + betegft_ossz
egen health_12mo_=sum(health) if t<t_merge, by(n)
egen health_12mo=min(health_12mo_), by(n)
gen log_health_12mo=log(health_12mo)
replace log_health_12mo=0 if log_health_12mo==.
drop health_12mo health_12mo_ fekvo_ft jaro_ft_ossz tbtam_ossz betegft_ossz
foreach var in A B C D G H J L M N P R S V {
	replace `var'_ft=0 if `var'_ft==.
	replace `var'_ft=1 if `var'_ft>0 & `var'_ft~=.
	egen `var'_12mo_=max(`var'_ft) if t<t_merge, by(n)
	egen `var'_12mo=min(`var'_12mo_), by(n)
	drop `var'_12mo_ `var'_ft
}
			
keep if t==t_merge

drop t n flag_missingFEj_l0 FEj_l0_imp health
rename t_merge t

*save "$out/potential_controls_for_within_firm_match_temp_2023Dec.dta", replace
tempfile temp2merge
save `temp2merge'

* step 2: add the t+d-related

* one row is an anon-t-d triplet 
keep anon t d*
reshape long d, i(anon t) j(num)
drop if d==.
drop num

gen n=_n
expand 14
rename t t_merge
egen t=seq(), by(n)
replace t=t*2+2 if t!=1
replace t=0 if t==1
replace t=t_merge+d+t

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 fogvisz1 w1 wh1 transfer_mtp transfer_ny)
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP = 1 if (vallazon1!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

gen ev=2003+int((t-1)/12)
gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017
gen w1_defl=w1/defl
gen w1_wins_defl=w1_wins/defl

replace w1_defl=. if EMP!=1
replace w1_wins_defl=. if EMP!=1
replace wh1=. if EMP!=1 
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20
gen lnw1_restr=ln((w1_defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
gen lnw1_wins_restr=ln((w1_wins_defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj

gen rel_time=t-d-t_merge

foreach X in lnw1_restr lnw1_wins_restr FEj{
	egen `X'_td1y_=mean(`X') if rel_time>=6 & rel_time<=18, by(n)
	egen `X'_td1y=min(`X'_td1y_), by(n)
	drop `X'_td1y_
	egen `X'_td2y_=mean(`X') if rel_time>=18 & rel_time<=30, by(n)
	egen `X'_td2y=min(`X'_td2y_), by(n)
	drop `X'_td2y_
}

keep if rel_time==0

rename lnw1_restr lnw1_restr_t1
rename lnw1_wins_restr lnw1_wins_restr_t1
rename FEj FEj_t1
rename *_td1y *_t2
rename *_td2y *_t3

drop n t vallazon w1 w1_wins wh1 fogvisz1 transfer_ny transfer_mtp SLdays ev defl EMP rel_time w1_defl w1_wins_defl
rename t_merge t

merge m:1 anon t using `temp2merge'
drop _merge d?
sum

rename vallazon_t vallazon1

save "$out/potential_controls_for_within_firm_match_2023Dec.dta", replace

